---
title: "CRASA Judicial Survey Open Ended Data Analysis"
author: "Amanda Austin"
date: "2023-04-14"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(warning = FALSE, message = FALSE) 
```

```{r}
library(stm)
library(tm)
library(SnowballC)
library(colorspace)
library(ellipsis)
library(quanteda)
library(hunspell)
library(plyr)
library(farver)
library(httpuv)
library(fastmap)
library(mime)
library(stminsights)
library(descr)
require(openintro)
require(lattice)
library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(shiny)
library(shinyjs)
library(grid)
library(gridExtra)
library(tidytext)
library(tidyverse)
library(stringr)
library(data.table)
library(magrittr)
library(reshape2)
library(scales)
library(textdata)
library(syuzhet)
library(lubridate)
library(ggthemes)
library(writexl)
get_sentiments("nrc")
```

```{r}
mydata <- SurveyUSA_Respondent_Level_Data_26659_U_of_Houston_Legal_Research[-1,]
```

```{r}
###################################
#Figure 2
###################################
```

```{r}
mydata$open1 <- mydata$`Can you please elaborate on why you did or did not trust the [survey's/judge's/algorithm's] advice? Your opinions are very important to us, so we appreciate your honest and thoughtful responses.`
```

```{r}
corpus1 <- Corpus(VectorSource(mydata$open1))
corpus1 <- tm_map(corpus1, content_transformer(tolower))
corpus1 <- tm_map(corpus1, removeNumbers)
corpus1 <- tm_map(corpus1, removePunctuation)
corpus1 <- tm_map(corpus1, removeWords, stopwords("en"))
mydata$open1 <- sapply(corpus1, function(x) { PlainTextDocument(x)$content })
mydata$open1 <- gsub("\\balso\\b", "", mydata$open1)
mydata$open1 <- gsub("\\bna\\b", "", mydata$open1)
mydata$open1 <- gsub("NA", "", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub("Na", "", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub("na", "", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" surveys ", " survey ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" judges ", " judge ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" algorithms ", " algorithm ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" commits ", " commit ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" commited ", " commit ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" crimil ", " criminal ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" crimil ", " criminal ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" crimil ", " criminal ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" crimils ", " criminal ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" crimes ", " crime ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" addiction ", " addict ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" addictive ", " addict ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub("¦", "", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub("€", "", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub("´", "", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" persol ", " person ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" hisher ", " his her ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" added ", " add ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" dont ", " do not ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" didnt ", " did not ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" isnt ", " is not ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" iccurate ", " accurate ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" actingruling ", " acting ruling ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" also ", " ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub(" also, ", " ", mydata$open1, fixed = TRUE)
mydata$open1 <- gsub("\\b(also|can)\\b", "", mydata$open1, fixed = TRUE)
mydata$open1 <- str_remove_all(mydata$open1, "\\balso\\b")
mydata$open1 <- sapply(strsplit(mydata$open1, " "), function(x) paste(x[x != "also"], collapse = " "))
```

```{r}
mydata$exp1_survey <- ifelse(!is.na(mydata$`Survey: Possession Cocaine, Revised Will Not Re-offend`), 1, NA)
mydata$exp1_judge <- ifelse(!is.na(mydata$`Judge: Possession Cocaine, Revised Will Not Re-offend`), 2, NA)
mydata$exp1_algorithm <- ifelse(!is.na(mydata$`Algorithm: Possession Cocaine, Revised Will Not Re-offend`), 3, NA)
mydata$exp1_groups <- paste(mydata$exp1_survey, mydata$exp1_judge, mydata$exp1_algorithm, sep = " ")
mydata$exp1_groups <- as.factor(gsub("\\bNA\\b", "", mydata$exp1_groups))
```

```{r}
mydata[, 10:105] <- sapply(mydata[, 10:105], as.numeric)

mydata$exp1_survey_cocaine_dist <- abs(mydata$`Survey: Possession Cocaine, Revised Will Re-offend`-mydata$`Survey: Possession Cocaine, Will Re-offend`)

mydata$exp1_survey_cannabis_dist <- abs(mydata$`Survey: Manufacturing Cannabis, Revised Will Re-offend` - mydata$`Survey: Manufacturing Cannabis, Will Re-offend`)

mydata$exp1_survey_grandtheft_dist <- abs(mydata$`Survey: Grand Theft, Revised Will Re-offend`- mydata$`Survey: Grand Theft, Will Re-offend`)

mydata$exp1_survey_meth_dist <- abs(mydata$`Survey: Possession Meth, Revised Will Re-offend`-mydata$`Survey: Possession Meth, Will Re-offend`)

mydata$exp1_survey_driving_dist <- abs(mydata$`Survey: Driving Revoked, Revised Will Re-offend`- mydata$`Survey: Driving Revoked, Will Re-offend`)

mydata$exp1_survey_childneglect_dist <- abs(mydata$`Survey: Child Neglect, Revised Will Re-offend`- mydata$`Survey: Child Neglect, Will Re-offend`)

mydata$exp1_survey_disorderly_dist <- abs(mydata$`Survey: Disorderly, Revised Will Re-offend`- mydata$`Survey: Disorderly, Will Re-offend`)

mydata$exp1_survey_resisting_dist <- abs(mydata$`Survey: Resisting, Revised Will Re-offend`- mydata$`Survey: Resisting, Will Re-offend`)

mydata$exp1_judge_cocaine_dist <- abs(mydata$`Judge: Possession Cocaine, Revised Will Re-offend`-mydata$`Judge: Possession Cocaine, Will Re-offend`)

mydata$exp1_judge_cannabis_dist <- abs(mydata$`Judge: Manufacturing Cannabis, Revised Will Re-offend` - mydata$`Judge: Manufacturing Cannabis, Will Re-offend`)

mydata$exp1_judge_grandtheft_dist <- abs(mydata$`Judge: Grand Theft, Revised Will Re-offend`- mydata$`Judge: Grand Theft, Will Re-offend`)

mydata$exp1_judge_meth_dist <- abs(mydata$`Judge: Possession Meth, Revised Will Re-offend`-mydata$`Judge: Possession Meth, Will Re-offend`)

mydata$exp1_judge_driving_dist <- abs(mydata$`Judge: Driving Revoked, Revised Will Re-offend`- mydata$`Judge: Driving Revoked, Will Re-offend`)

mydata$exp1_judge_childneglect_dist <- abs(mydata$`Judge: Child Neglect, Revised Will Re-offend`- mydata$`Judge: Child Neglect, Will Re-offend`)

mydata$exp1_judge_disorderly_dist <- abs(mydata$`Judge: Disorderly, Revised Will Re-offend`- mydata$`Judge: Disorderly, Will Re-offend`)

mydata$exp1_judge_resisting_dist <- abs(mydata$`Judge: Resisting, Revised Will Re-offend`- mydata$`Judge: Resisting, Will Re-offend`)

mydata$exp1_algorithm_cocaine_dist <- abs(mydata$`Algorithm: Possession Cocaine, Revised Will Re-offend`-mydata$`Algorithm: Possession Cocaine, Will Re-offend`)

mydata$exp1_algorithm_cannabis_dist <- abs(mydata$`Algorithm: Manufacturing Cannabis, Revised Will Re-offend` - mydata$`Algorithm: Manufacturing Cannabis, Will Re-offend`)

mydata$exp1_algorithm_grandtheft_dist <- abs(mydata$`Algorithm: Grand Theft, Revised Will Re-offend`- mydata$`Algorithm: Grand Theft, Will Re-offend`)

mydata$exp1_algorithm_meth_dist <- abs(mydata$`Algorithm: Possession Meth, Revised Will Re-offend`-mydata$`Algorithm: Possession Meth, Will Re-offend`)

mydata$exp1_algorithm_driving_dist <- abs(mydata$`Algorithm: Driving Revoked, Revised Will Re-offend`- mydata$`Algorithm: Driving Revoked, Will Re-offend`)

mydata$exp1_algorithm_childneglect_dist <- abs(mydata$`Algorithm: Child Neglect, Revised Will Re-offend`- mydata$`Algorithm: Child Neglect, Will Re-offend`)

mydata$exp1_algorithm_disorderly_dist <- abs(mydata$`Algorithm: Disorderly, Revised Will Re-offend`- mydata$`Algorithm: Disorderly, Will Re-offend`)

mydata$exp1_algorithm_resisting_dist <- abs(mydata$`Algorithm: Resisting, Revised Will Re-offend`- mydata$`Algorithm: Resisting, Will Re-offend`)

mydata$exp1_survey_dist <- (mydata$exp1_survey_cocaine_dist + mydata$exp1_survey_cannabis_dist + mydata$exp1_survey_grandtheft_dist + mydata$exp1_survey_meth_dist + mydata$exp1_survey_driving_dist + mydata$exp1_survey_childneglect_dist + mydata$exp1_survey_disorderly_dist + mydata$exp1_survey_resisting_dist)/8

mydata$exp1_judge_dist <- (mydata$exp1_judge_cocaine_dist + mydata$exp1_judge_cannabis_dist + mydata$exp1_judge_grandtheft_dist + mydata$exp1_judge_meth_dist + mydata$exp1_judge_driving_dist + mydata$exp1_judge_childneglect_dist + mydata$exp1_judge_disorderly_dist + mydata$exp1_judge_resisting_dist)/8

mydata$exp1_algorithm_dist <- (mydata$exp1_algorithm_cocaine_dist + mydata$exp1_algorithm_cannabis_dist + mydata$exp1_algorithm_grandtheft_dist + mydata$exp1_algorithm_meth_dist + mydata$exp1_algorithm_driving_dist + mydata$exp1_algorithm_childneglect_dist + mydata$exp1_algorithm_disorderly_dist + mydata$exp1_algorithm_resisting_dist)/8

mydata$exp1_dist <- paste(mydata$exp1_survey_dist, mydata$exp1_judge_dist, mydata$exp1_algorithm_dist, sep = " ")
mydata$exp1_dist <- as.numeric(gsub("\\bNA\\b", "", mydata$exp1_dist))
```

```{r}
mydata1 <- mydata[, c("open1", "exp1_groups", "exp1_dist")]
mydata2 <- na.omit(mydata1)
df1 = data.frame(mydata2)
```

```{r}
tokens_open1 <- tokens(df_open1, remove_punct = TRUE, remove_numbers = TRUE)
tokens_open1 <- tokens_remove(tokens_open1, stopwords("english"))
tokens_open1 <- tokens_remove(tokens_open1, "also")
tokens_open1 <- tokens_remove(tokens_open1, "based")
tokens_open1 <- tokens_remove(tokens_open1, "unless")
tokens_open1 <- tokens_remove(tokens_open1, "already")
tokens_open1 <- tokens_remove(tokens_open1, "can")
tokens_open1 <- tokens_remove(tokens_open1, "lot")
tokens_open1 <- tokens_remove(tokens_open1, "even")
tokens_open1 <- tokens_remove(tokens_open1, "might")
tokens_open1 <- tokens_remove(tokens_open1, "get")
tokens_open1 <- tokens_remove(tokens_open1, "sometimes")
tokens_open1 <- tokens_remove(tokens_open1, "may")
tokens_open1 <- tokens_remove(tokens_open1, "within")
tokens_open1 <- tokens_remove(tokens_open1, "ive")
tokens_open1 <- tokens_remove(tokens_open1, "someone")
tokens_open1 <- tokens_remove(tokens_open1, "everyone")
tokens_open1 <- tokens_remove(tokens_open1, "ture")
tokens_open1 <- tokens_remove(tokens_open1, "whether")
tokens_open1 <- tokens_remove(tokens_open1, "many")
dfm_open1 <- dfm(tokens_open1, remove_punct = FALSE, remove_numbers = FALSE)
dfm_open1 <- dfm_trim(dfm_open1, min_termfreq = 5, min_docfreq = 2)
```


```{r}
documents <- mydata2$open1

processed_data <- textProcessor(
  documents = documents,
  metadata = metadata_df,
  stem = FALSE
)

out <- prepDocuments(processed_data$documents, processed_data$vocab, processed_data$meta)

documents <- out$documents
vocab <- out$vocab
metadata <- out$meta
```
```{r}
K <- c(2, 3, 4, 5, 6, 7, 8)
topic_search_open1 <- stm::searchK(out$documents, out$vocab, K, data = out$meta, prevalence = ~ exp1_groups*exp1_dist)
```

```{r}
plot.searchK(topic_search_open1)
```

```{r}
open1_model_Select <- selectModel(out$documents, out$vocab, K = 2, prevalence = ~ exp1_groups*exp1_dist, max.em.its = 100, data = out$meta, runs = 100, seed = 6)
```

```{r}
plotModels(open1_model_Select, pch=c(1,2,3,4), legend.position="bottomright")
```

```{r, message=FALSE, warning=FALSE, results='hide'}
open1_model <- open1_model_Select$runout[[1]]
```

```{r}
topics_open1 = stm(documents = out$documents, vocab = out$vocab, data = out$meta, K = 2, prevalence = ~  exp1_groups*exp1_dist, init.type = "Spectral")
```

```{r}
labelTopics(topics_open1)
```
```{r}
td_beta_open1 <- tidy(topics_open1)

td_beta_open1 <- td_beta_open1 %>%
    group_by(topic) %>%
    top_n(10, beta) %>%
    ungroup() %>%
    mutate(topic = paste0("Topic ", topic),
           term = reorder_within(term, beta, topic)) 

td_beta_open1_plot <- td_beta_open1 %>%
    ggplot(aes(term, beta)) +
    geom_col(alpha = 0.8, show.legend = FALSE) +
    facet_wrap(~ topic, scales = "free_y") +
    coord_flip() + 
    theme_grey() +
    scale_x_reordered() +
    labs(x = NULL, y = expression(beta),
         title = "(A) Highest Word Probabilities By Topic") + 
    theme_classic(base_size = 25) +  
    theme(
        plot.caption.position = "plot",
        plot.caption = element_text(hjust = 0),
        axis.text.y = element_text(size = 18, face = "bold"),  
        strip.text = element_text(face = "bold"),              
        axis.title.x = element_text(face = "bold"),           
        axis.title.y = element_text(face = "bold")             
    ) 

print(td_beta_open1_plot)

ggsave("C:/Users/Figure_2A.png", plot = td_beta_open1_plot, width = 9, height = 7)
```

```{r}
out$meta$exp1_groups <- as.factor(out$meta$exp1_groups) 
open1_prep <- estimateEffect(1:2 ~ exp1_groups*exp1_dist, topics_open1, out$meta, uncertainty="Global") 
summary(open1_prep,topics = 1)
summary(open1_prep,topics = 2)
```
```{r}
treatment_open1 <- get_effects(estimates = open1_prep,
                      variable = 'exp1_groups',
                      type = 'pointestimate')
```

```{r}
plot_open1_treatment_topic1 <- treatment_open1 %>% filter(topic == 1) %>%
ggplot(aes(x = value, y = proportion)) +
 geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1, size = 1) +
 geom_point(size = 3) +
 coord_flip() + theme_classic(base_size = 25) + labs(x = 'Treatment Group', y = 'Topic 1 Proportion') + scale_x_discrete(labels = c("Survey","Judge", "Algorithm")) + scale_y_continuous(limits = c(0, 0.75))
```

```{r}
plot_open1_treatment_topic2 <- treatment_open1 %>% filter(topic == 2) %>%
ggplot(aes(x = value, y = proportion)) +
 geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1, size = 1) +
 geom_point(size = 3) +
 coord_flip() + theme_classic(base_size = 25) + labs(x = 'Treatment Group', y = 'Topic 2 Proportion') + scale_x_discrete(labels = c("Survey","Judge", "Algorithm")) + scale_y_continuous(limits = c(0, .75))
```

```{r}
plot_open1_treatment_topic1 <- plot_open1_treatment_topic1 +
  theme(
    title = element_text(face = "bold"),          
    axis.text = element_text(face = "bold"),      
    axis.title = element_text(face = "bold"),  
    plot.margin = margin(1, 2, 1, 2, "cm")      
  )

plot_open1_treatment_topic2 <- plot_open1_treatment_topic2 +
  theme(
    title = element_text(face = "bold"),           
    axis.text = element_text(face = "bold"),      
    axis.title = element_text(face = "bold"),    
    plot.margin = margin(1, 2, 1, 2, "cm")     
  )

p_2b <- grid.arrange(
  plot_open1_treatment_topic1, plot_open1_treatment_topic2, 
  ncol = 2, nrow = 1
)

p_2b

ggsave("C:/Users/Figure_2B.png", plot = p_2b, width = 12, height = 7)  
```
```{r}
distance_open1 <- get_effects(estimates = open1_prep,
                      variable = 'exp1_dist',
                      type = 'continuous')
```

```{r}
open1 <- as.character(mydata2$open1)
```

```{r}
table(df1nlp$stanza_edited)
table(df1nlp$textblob_discrete_edited)
table(df1nlp$vader_discrete_edited)
```

```{r}
gfg <- data.frame(x = c(432, 549, 38, 252, 427, 340, 441, 294, 284), 
                   grp = rep(c("Stanza", "Textblob",
                               "Vader"),
                               each = 3),
                   subgroup = LETTERS[1:3])

gfg <- reshape(gfg,idvar = "subgroup",
               timevar = "grp",
               direction = "wide")
  
row.names(gfg) <- gfg$subgroup
gfg <- gfg[ , 2:ncol(gfg)]
colnames(gfg) <- c("Stanza", "Textblob", "Vader")
gfg <- as.matrix(gfg)
```

```{r}
table_open1_stanza_chi <- table(mydata$exp1_groups, df1nlp$stanza_edited)
chisq.test(table_open1_stanza_chi)
```

```{r}
table_open1_textblob_chi <- table(mydata$exp1_groups, df1nlp$textblob_discrete_edited)
chisq.test(table_open1_textblob_chi)
```
```{r}
df1$stanza_edited <- df1nlp$stanza_edited
df1$vader_edited <- df1nlp$vader_edited
df1$textblob_edited <- df1nlp$textblob_edited

df1$composite <- (df1$stanza_edited + df1$vader_edited + df1$textblob_edited)/3
breaks <- seq(-1, 1, length.out = 6)

df1$composite.factor <- cut(df1$composite, breaks, labels = FALSE, include.lowest = TRUE)

table_open1_composite_chi <- table(df1$exp1_groups, df1$composite.factor)
table_open1_composite_chi
chisq.test(table_open1_composite_chi)
colors2 = c("darkgreen", "darkblue", "red")
```

```{r}
par(mar = c(5, 8, 4, 2))
par(font.lab = 2,   
    font.axis = 2,   
    font.main = 2,  
    font.sub = 2,   
    cex.lab = 1.5,  
    cex.axis = 1.2, 
    cex.names = 1.2) 
```

```{r}
plot2C <- barplot(
  (table_open1_composite_chi) / nrow(df1),
  ylab = "Proportion of Responses",
  names.arg = c("Very Negative", "Negative", "Neutral", "Positive", "Very Positive"),
  col = colors2,
  beside = TRUE
)

legend("topright", 
       legend = c("Survey", "Judge", "Algorithm"), 
       fill = colors2, 
       title = "Experimental Groups",  
       text.font = 2,  
       cex = 1.2,  
       inset = c(0.01, 0.01)) 

plot2C
```






```{r}
###################################
#Figure 4
###################################
```

```{r}
mydata$open3 <- mydata$`Why did you think the [anonymous informant’s/eyewitness's/facial recognition program's] information was sufficient or insufficient to issue a warrant? Please share your thoughts with us.`
```

```{r}
corpus3 <- Corpus(VectorSource(mydata$open3))
corpus3 <- tm_map(corpus3, content_transformer(tolower))
corpus3 <- tm_map(corpus3, removeNumbers)
corpus3 <- tm_map(corpus3, removePunctuation)
corpus3 <- tm_map(corpus3, removeWords, stopwords("en"))
mydata$open3 <- sapply(corpus3, function(x) { PlainTextDocument(x)$content })
mydata$open3 <- gsub("NA", "", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub("Na", "", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub("na", "", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub("na", "", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" surveys ", " survey ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" judges ", " judge ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" algorithms ", " algorithm ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" commits ", " commit ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" commited ", " commit ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" crimil ", " criminal ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" crimil ", " criminal ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" crimil ", " criminal ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" crimils ", " criminal ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" crimes ", " crime ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" addiction ", " addict ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" addictive ", " addict ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub("¦", "", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub("€", "", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub("´", "", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" persol ", " person ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" hisher ", " his her ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" added ", " add ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" actingruling ", " acting ruling ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" reasoble ", " reasonable ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" werent ", " were not ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" eyewitness ", " witness ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" id ", " identification ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" dont ", " do not ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" didnt ", " did not ", mydata$open3, fixed = TRUE)
mydata$open3 <- gsub(" isnt ", " is not ", mydata$open3, fixed = TRUE)
```

```{r}
mydata$exp2_informant <- ifelse(!is.na(mydata$`Informant warrant`), 1, NA)
mydata$exp2_eyewitness <- ifelse(!is.na(mydata$`Eyewitness warrant`), 2, NA)
mydata$exp2_facial <- ifelse(!is.na(mydata$`Facial recognition warrant`), 3, NA)
mydata$exp2_groups <- coalesce(mydata$exp2_informant, mydata$exp2_eyewitness, mydata$exp2_facial)
mydata$exp2_groups <- as.factor(mydata$exp2_groups)
```

```{r}
mydata$informant <- as.numeric(ifelse(mydata$`Informant warrant`== "1", "1", ifelse(mydata$`Informant warrant`== "2", "2", ifelse(mydata$`Informant warrant`== "3", "3", ifelse(mydata$`Informant warrant`== "4", "4", ifelse(mydata$`Informant warrant`== "5", "5", NA))))))

mydata$eyewitness <- as.numeric(ifelse(mydata$`Eyewitness warrant`== "1", "1", ifelse(mydata$`Eyewitness warrant`== "2", "2", ifelse(mydata$`Eyewitness warrant`== "3", "3", ifelse(mydata$`Eyewitness warrant`== "4", "4", ifelse(mydata$`Eyewitness warrant`== "5", "5", NA))))))

mydata$facial <- as.numeric(ifelse(mydata$`Facial recognition warrant`== "1", "1", ifelse(mydata$`Facial recognition warrant`== "2", "2", ifelse(mydata$`Facial recognition warrant`== "3", "3", ifelse(mydata$`Facial recognition warrant`== "4", "4", ifelse(mydata$`Facial recognition warrant`== "5", "5", NA))))))
```

```{r}
mydata$exp2_results <- as.numeric(coalesce(mydata$informant, mydata$eyewitness, mydata$facial))

mydata$exp2_results <- paste(mydata$exp2_results, sep = " ")

mydata$exp2_results <- as.numeric(gsub("\\bNA\\b", "", mydata$exp2_results))
```

```{r}
mydata5 <- mydata[, c("open3", "exp2_groups", "exp2_results")]

mydata6 <- na.omit(mydata5)
df3 = data.frame(mydata6)
```

```{r}
df_open3 <- corpus(mydata6, text_field = 'open3')

tokens_open3 <- tokens(df_open3, remove_punct = TRUE, remove_numbers = TRUE)

tokens_open3 <- tokens_remove(tokens_open3, c("can", "may", "sometimes"))

tokens_open3 <- tokens_remove(tokens_open3, stopwords("english"))

dfm_open3 <- dfm(tokens_open3, remove_punct = FALSE, remove_numbers = FALSE)

dfm_open3 <- dfm_trim(dfm_open3, min_termfreq = 5, min_docfreq = 2)

out <- quanteda::convert(dfm_open3, to = 'stm')
```

```{r}
K <- c(2, 3, 4, 5, 6, 7, 8)
topic_search_open3 <- stm::searchK(out$documents, out$vocab, K, data = out$meta, prevalence = ~ as.factor(exp2_groups)*as.numeric(exp2_results))
```

```{r}
plot.searchK(topic_search_open3)
```

```{r}
open3_model_Select <- selectModel(out$documents, out$vocab, K = 3, prevalence = ~ exp2_groups*exp2_results, max.em.its = 100, data = out$meta, runs = 100, seed = 6)
```

```{r}
plotModels(open3_model_Select, pch=c(1,2,3,4), legend.position="bottomright")
```

```{r, message=FALSE, warning=FALSE, results='hide'}
open3_model <- open3_model_Select$runout[[4]]
```

```{r}
topics_open3 = stm(documents = out$documents, vocab = out$vocab, data = out$meta, K = 3, prevalence = ~ as.factor(exp2_groups)*as.numeric(exp2_results), init.type = "Spectral")
```

```{r}
labelTopics(topics_open3)
```

```{r}
Figure_4A <- td_beta_open3 %>%
    group_by(topic) %>%
    top_n(10, beta) %>%
    ungroup() %>%
    mutate(topic = paste0("Topic ", topic),
           term = reorder_within(term, beta, topic)) %>%
    ggplot(aes(term, beta)) +
    geom_col(alpha = 0.8, show.legend = FALSE) +
    facet_wrap(~ topic, scales = "free_y") +
    coord_flip() +
    scale_x_reordered() + 
    theme_classic(base_size = 10) + 
    labs(x = NULL, 
         y = expression(beta),
         title = "Highest Word Probabilities By Topic") + 
    theme(
        axis.text.y = element_text(size = 18, face = "bold"), 
        axis.text.x = element_text(size = 12, face = "bold"),  
        axis.title = element_text(size = 14, face = "bold"), 
        strip.text = element_text(size = 14, face = "bold"),  
        plot.title = element_text(size = 16, face = "bold"), 
    )

Figure_4A

ggsave("C:/Users/Figure_4A.png", plot = Figure_4A, width = 10, height = 7)
```

```{r}
out$meta$exp2_results <- as.numeric(out$meta$exp2_results) 
out$meta$exp2_groups <- as.factor(out$meta$exp2_groups) 
open3_prep <- estimateEffect(1:3 ~ exp2_results*exp2_groups, topics_open3, out$meta, uncertainty="Global") 
```

```{r}
treatment_open3 <- get_effects(estimates = open3_prep,
                      variable = 'exp2_groups',
                      type = 'pointestimate')
```

```{r}
plot_open3_treatment_topic1 <- treatment_open3 %>% filter(topic == 1) %>%
  ggplot(aes(x = value, y = proportion)) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1, size = 1) +
  geom_point(size = 1.5) +
  coord_flip() + 
  theme_classic(base_size = 12) + 
  labs(x = 'Treatment Group', y = 'Topic 1 Proportion') + 
  scale_x_discrete(labels = c('Informant', 'Eye \n Witness', 'Facial \n Recognition')) + 
  scale_y_continuous(limits = c(0, 0.75)) +
  theme(
    axis.title = element_text(face = "bold"),  
    axis.text = element_text(face = "bold"),  
    axis.text.x = element_text(face = "bold"), 
    axis.text.y = element_text(face = "bold"), 
    plot.title = element_text(face = "bold"),  
    plot.subtitle = element_text(face = "bold") 
  )

plot_open3_treatment_topic2 <- treatment_open3 %>% filter(topic == 2) %>%
  ggplot(aes(x = value, y = proportion)) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1, size = 1) +
  geom_point(size = 1.5) +
  coord_flip() + 
  theme_classic(base_size = 12) + 
  labs(x = 'Treatment Group', y = 'Topic 2 Proportion') + 
  scale_x_discrete(labels = c('Informant', 'Eye \n Witness', 'Facial \n Recognition')) + 
  scale_y_continuous(limits = c(0, 0.75)) +
  theme(
    axis.title = element_text(face = "bold"), 
    axis.text = element_text(face = "bold"),   
    axis.text.x = element_text(face = "bold"), 
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(face = "bold"),  
    plot.subtitle = element_text(face = "bold") 
  )

plot_open3_treatment_topic3 <- treatment_open3 %>% filter(topic == 3) %>%
  ggplot(aes(x = value, y = proportion)) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1, size = 1) +
  geom_point(size = 1.5) +
  coord_flip() + 
  theme_classic(base_size = 12) + 
  labs(x = 'Treatment Group', y = 'Topic 3 Proportion') + 
  scale_x_discrete(labels = c('Informant', 'Eye \n Witness', 'Facial \n Recognition')) + 
  scale_y_continuous(limits = c(0, 0.75)) +
  theme(
    axis.title = element_text(face = "bold"), 
    axis.text = element_text(face = "bold"),   
    axis.text.y = element_text(face = "bold"), 
    axis.text.x = element_text(face = "bold"),
    plot.title = element_text(face = "bold"),  
    plot.subtitle = element_text(face = "bold") 
  )

p_4b <- grid.arrange(
  plot_open3_treatment_topic1, 
  plot_open3_treatment_topic2, 
  plot_open3_treatment_topic3, 
  ncol = 3, nrow = 1, 
  top = textGrob("Topic Proportions by Treatment Group", gp = gpar(fontface = "bold"))
)

ggsave("C:/Users/Figure_4B.png", plot = p_4b, width = 12, height = 7)
```

```{r}
interaction_open3 <- bind_rows(
  get_effects(estimates = open3_prep, variable = open3_prep$varlist[1],  type = 'continuous', moderator = open3_prep$varlist[2], modval = 1),
  get_effects(estimates = open3_prep, variable = open3_prep$varlist[1], type = 'continuous', moderator = open3_prep$varlist[2], modval = 2),
  get_effects(estimates = open3_prep, variable = open3_prep$varlist[1], type = 'continuous', moderator = open3_prep$varlist[2], modval = 3)
)
```

```{r}
interaction_open3 <- interaction_open3 %>% mutate(cus.labels = case_when(topic == 1 & moderator == 'exp2_results' ~ 'exp2_groups', topic ==2 & moderator == 'exp2_results' ~ 'exp2_groups', topic ==3 & moderator == 'exp2_results' ~ 'exp2_groups'))
```

```{r}
plot_open3_interaction_topic1 <- interaction_open3 %>%
  filter(topic == 1) %>%
  mutate(moderator = as.factor(moderator)) %>%
  ggplot(aes(x = value, y = proportion, group = moderator, fill = moderator)) +
  geom_line() +
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  theme_classic(base_size = 15) +
  labs(x = 'Likelihood', y = 'Topic 1 Proportion', color = 'Treatment', group = 'Treatment', fill = 'Treatment') +
  scale_fill_manual(values = c('darkred', 'darkgreen', 'darkblue'), labels = c('Informant', 'Eye Witness', 'Facial Recognition')) +
  theme(
    axis.title = element_text(face = "bold"),    
    axis.text = element_text(face = "bold"),    
    axis.text.y = element_text(face = "bold"),   
    axis.text.x = element_text(face = "bold"),   
    plot.title = element_text(face = "bold"),    
    plot.subtitle = element_text(face = "bold"), 
    legend.title = element_text(face = "bold"), 
    legend.text = element_text(face = "bold")   
  )

plot_open3_interaction_topic2 <- interaction_open3 %>%
  filter(topic == 2) %>%
  mutate(moderator = as.factor(moderator)) %>%
  ggplot(aes(x = value, y = proportion, group = moderator, fill = moderator)) +
  geom_line() +
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  theme_classic(base_size = 15) +
  labs(x = 'Likelihood', y = 'Topic 2 Proportion', color = 'Treatment', group = 'Treatment', fill = 'Treatment') +
  scale_fill_manual(values = c('darkred', 'darkgreen', 'darkblue'), labels = c('Informant', 'Eye Witness', 'Facial Recognition')) +
  theme(
    axis.title = element_text(face = "bold"),    
    axis.text = element_text(face = "bold"),     
    axis.text.y = element_text(face = "bold"),
    axis.text.x = element_text(face = "bold"),  
    plot.title = element_text(face = "bold"),   
    plot.subtitle = element_text(face = "bold"),
    legend.title = element_text(face = "bold"), 
    legend.text = element_text(face = "bold")  
  )

plot_open3_interaction_topic3 <- interaction_open3 %>%
  filter(topic == 3) %>%
  mutate(moderator = as.factor(moderator)) %>%
  ggplot(aes(x = value, y = proportion, group = moderator, fill = moderator)) +
  geom_line() +
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  theme_classic(base_size = 15) +
  labs(x = 'Likelihood', y = 'Topic 3 Proportion', color = 'Treatment', group = 'Treatment', fill = 'Treatment') +
  scale_fill_manual(values = c('darkred', 'darkgreen', 'darkblue'), labels = c('Informant', 'Eye Witness', 'Facial Recognition')) +
  theme(
    axis.title = element_text(face = "bold"),   
    axis.text = element_text(face = "bold"),     
    axis.text.y = element_text(face = "bold"),  
    axis.text.x = element_text(face = "bold"),  
    plot.title = element_text(face = "bold"),    
    plot.subtitle = element_text(face = "bold"), 
    legend.title = element_text(face = "bold"), 
    legend.text = element_text(face = "bold")  
  )

p_4c <- grid.arrange(
  plot_open3_interaction_topic1, 
  plot_open3_interaction_topic2, 
  plot_open3_interaction_topic3, 
  ncol = 1, nrow = 3, 
  top = textGrob("Topic Proportions by Treatment and by Likelihood of Issuing a Search Warrant", gp = gpar(fontface = "bold"))  
)

ggsave("C:/Users/Figure_4C.png", 
       plot = p_4c, width = 10, height = 7)
```

```{r}
nlp3 <- df3nlp
breaks <- seq(-1, 1, length.out = 6)
nlp3$composite.factor <- cut(nlp3$composite, breaks, labels = FALSE, include.lowest = TRUE)
```

```{r}
table_open3_composite_chi <- table(nlp3$exp2_groups, nlp3$composite.factor)
chisq.test(table_open3_composite_chi)

Figure_4D <- barplot(
  (table_open3_composite_chi) / nrow(nlp3),
  ylab = "Proportion of Responses",
  names.arg = c("Very Negative", "Negative", "Neutral", "Positive", "Very Positive"),
  col = colors2,
  beside = TRUE,
  font.axis = 2,    
  font.lab = 2,      
  font.main = 2,   
  font.sub = 2, 
  ylim = c(0, 0.2) 
)

legend("topright", 
       legend = c("Informant", "Eye Witness", "Facial Recognition"), 
       fill = colors2, 
       title = "Experimental Groups", 
       text.font = 2) 

Figure_4D
```









```{r}
###################################
#Figure 6
###################################
```

```{r}
mydata$open4 <- mydata$`Please explain how you arrived at the sentence that you did. Your detailed and thoughtful response is important. `
```

```{r}
corpus4 <- Corpus(VectorSource(mydata$open4))
corpus4 <- tm_map(corpus4, content_transformer(tolower))
corpus4 <- tm_map(corpus4, removeNumbers)
corpus4 <- tm_map(corpus4, removePunctuation)
corpus4 <- tm_map(corpus4, removeWords, stopwords("en"))
mydata$open4 <- sapply(corpus4, function(x) { PlainTextDocument(x)$content })
mydata$open4 <- gsub("NA", "", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub("Na", "", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub("na", "", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub("na", "", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" surveys ", " survey ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" judges ", " judge ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" algorithms ", " algorithm ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" commits ", " commit ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" commited ", " commit ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" crimil ", " criminal ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" crimil ", " criminal ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" crimil ", " criminal ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" crimils ", " criminal ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" crimes ", " crime ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" addiction ", " addict ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" addictive ", " addict ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub("¦", "", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub("€", "", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub("´", "", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" persol ", " person ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" hisher ", " his her ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" added ", " add ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" actingruling ", " acting ruling ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" reasoble ", " reasonable ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" werent ", " were not ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" id ", " identification ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" dont ", " do not ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" didnt ", " did not ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" isnt ", " is not ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" sentencing ", " sentence ", mydata$open4, fixed = TRUE)
mydata$open4 <- gsub(" priors ", " prior ", mydata$open4, fixed = TRUE)
```

```{r}
mydata$exp3_prosecutor <- ifelse(!is.na(mydata$`Sentencing: Prosecutors`), 1, NA)
mydata$exp3_judge <- ifelse(!is.na(mydata$`Sentencing: Judge`), 2, NA)
mydata$exp3_algorithm <- ifelse(!is.na(mydata$`Sentencing: Algorithm`), 3, NA)
mydata$exp3_probation <- ifelse(!is.na(mydata$`Sentencing: Probation`), 4, NA)
mydata$exp3_groups <- coalesce(mydata$exp3_prosecutor, mydata$exp3_judge, mydata$exp3_algorithm, mydata$exp3_probation)
mydata$exp3_groups <- as.factor(mydata$exp3_groups)
```

```{r}
mydata$prosecutor <- as.numeric(mydata$`Sentencing: Prosecutors`)
mydata$judge <- as.numeric(mydata$`Sentencing: Judge`)
mydata$algorithm <- as.numeric(mydata$`Sentencing: Algorithm`)
mydata$probation <- as.numeric(mydata$`Sentencing: Probation`)
```

```{r}
mydata$exp3_results <- as.numeric(coalesce(mydata$prosecutor, mydata$judge, mydata$algorithm, mydata$probation))
mydata$exp3_results <- paste(mydata$exp3_results, sep = " ")
mydata$exp3_results <- as.numeric(gsub("\\bNA\\b", "", mydata$exp3_results))
```

```{r}
mydata7 <- mydata[, c("open4", "exp3_groups", "exp3_results")]
mydata8 <- na.omit(mydata7)
```

```{r}
df_open4 <- corpus(mydata8, text_field = 'open4')
tokens_open4 <- tokens(df_open4, remove_punct = TRUE, remove_numbers = TRUE)
tokens_open4 <- tokens_remove(tokens_open4, c("can", "may", "sometimes", "also"))
tokens_open4 <- tokens_remove(tokens_open4, stopwords("english"))
dfm_open4 <- dfm(tokens_open4, remove_punct = FALSE, remove_numbers = FALSE)
out <- quanteda::convert(dfm_open4, to = 'stm')
```

```{r}
K <- c(2, 3, 4, 5, 6, 7, 8)
topic_search_open4 <- stm::searchK(out$documents, out$vocab, K, data = out$meta, prevalence = ~ as.factor(exp3_groups)*as.numeric(exp3_results))
```

```{r}
plot.searchK(topic_search_open4)
```

```{r}
open4_model_Select <- selectModel(out$documents, out$vocab, K = 2, prevalence = ~ exp3_groups*exp3_results, max.em.its = 100, data = out$meta, runs = 100, seed = 6)
```

```{r}
plotModels(open4_model_Select, pch=c(1,2,3,4), legend.position="bottomright")
```

```{r, message=FALSE, warning=FALSE, results='hide'}
open4_model <- open4_model_Select$runout[[12]]
```

```{r}
topics_open4 = stm(documents = out$documents, vocab = out$vocab, data = out$meta, K = 2, prevalence = ~ as.factor(exp3_groups)*as.numeric(exp3_results), init.type = "Spectral")
```

```{r}
labelTopics(topics_open4)
```

```{r}
td_beta_open4 <- tidy(topics_open4)

Figure_6A <- td_beta_open4 %>%
    group_by(topic) %>%
    top_n(15, beta) %>%
    ungroup() %>%
    mutate(topic = paste0("Topic ", topic),
           term = reorder_within(term, beta, topic)) %>%
    ggplot(aes(term, beta)) +
    geom_col(alpha = 0.8, show.legend = FALSE) +
    facet_wrap(~ topic, scales = "free_y") +
    coord_flip() + 
    theme_grey() +
    scale_x_reordered() +
    labs(x = NULL, 
         y = expression(beta),
         title = "Highest Word Probabilities By Topic") + 
    theme_classic(base_size = 20) +
    theme(
        plot.title = element_text(face = "bold"),     
        axis.title = element_text(face = "bold"),    
        axis.text = element_text(face = "bold"),     
        plot.caption = element_text(face = "bold")    
    )

Figure_6A

ggsave("C:/Users/Figure_6A.png", plot = Figure_6A, width = 9, height = 7)

```


```{r}
out$meta$exp3_results <- as.numeric(out$meta$exp3_results) 
out$meta$exp3_groups <- as.factor(out$meta$exp3_groups) 
open4_prep <- estimateEffect(1:2 ~ exp3_results*exp3_groups, topics_open4, out$meta, uncertainty="Global") 
```

```{r}
summary(open4_prep)
```
```{r}
treatment_open4 <- get_effects(estimates = open4_prep,
                      variable = 'exp3_groups',
                      type = 'pointestimate')
```

```{r}
plot_open4_treatment_topic1 <- treatment_open4 %>% filter(topic == 1) %>%
  ggplot(aes(x = value, y = proportion)) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1, size = 1) +
  geom_point(size = 1.5) +
  coord_flip() + 
  theme_classic(base_size = 15) + 
  labs(x = 'Treatment Group', y = 'Topic 1 Proportion') + 
  scale_x_discrete(labels = c('Prosecutor', 'Judge', 'Sentencing Algorithm', 'Probation Officer')) + 
  scale_y_continuous(limits = c(0, 0.75)) +
  theme(
    plot.title = element_text(face = "bold"),     
    axis.title = element_text(face = "bold"),     
    axis.text = element_text(face = "bold"),     
    plot.margin = margin(1, 1, 1, 6)         
  )

plot_open4_treatment_topic2 <- treatment_open4 %>% filter(topic == 2) %>%
  ggplot(aes(x = value, y = proportion)) +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.1, size = 1) +
  geom_point(size = 1.5) +
  coord_flip() + 
  theme_classic(base_size = 15) + 
  labs(x = ' ', y = 'Topic 2 Proportion') + 
  scale_x_discrete(labels = c('Prosecutor', 'Judge', 'Sentencing Algorithm', 'Probation Officer')) + 
  scale_y_continuous(limits = c(0, 0.75)) +
  theme(
    plot.title = element_text(face = "bold"),     
    axis.title = element_text(face = "bold"),   
    axis.text = element_text(face = "bold"),      
    plot.margin = margin(1, 1, 1, 6)               
  )

p_6b <- grid.arrange(
  plot_open4_treatment_topic1, 
  plot_open4_treatment_topic2, 
  ncol = 2, 
  nrow = 1, 
  top = textGrob("Topic Proportions by Treatment Group", gp = gpar(fontface = "bold"))) 
ggsave("C:/Users/ataus/OneDrive/Documents/Amanda/Phd/CRASA/Legal Community Survey/Figure_6B.png", plot = p_6b, width = 12, height = 7) 
```















```{r}
###################################
#Figure 7
###################################
```

```{r}
mydata$open5 <- mydata$`As noted earlier in the survey, algorithms are increasingly being used to advise on sentencing and bail decisions. Please let us know, what do you think of this trend?`
```

```{r}
corpus5 <- Corpus(VectorSource(mydata$open5))
corpus5 <- tm_map(corpus5, content_transformer(tolower))
corpus5 <- tm_map(corpus5, removeNumbers)
corpus5 <- tm_map(corpus5, removePunctuation)
corpus5 <- tm_map(corpus5, removeWords, stopwords("en"))
mydata$open5 <- sapply(corpus5, function(x) { PlainTextDocument(x)$content })
mydata$open5 <- gsub("NA", "", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub("Na", "", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub("na", "", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub("na", "", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" surveys ", " survey ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" judges ", " judge ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" algorithms ", " algorithm ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" commits ", " commit ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" commited ", " commit ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" crimil ", " criminal ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" crimil ", " criminal ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" crimil ", " criminal ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" crimils ", " criminal ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" crimes ", " crime ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" addiction ", " addict ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" addictive ", " addict ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub("¦", "", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub("€", "", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub("´", "", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" persol ", " person ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" hisher ", " his her ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" added ", " add ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" actingruling ", " acting ruling ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" reasoble ", " reasonable ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" werent ", " were not ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" id ", " identification ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" dont ", " do not ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" didnt ", " did not ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" isnt ", " is not ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" factors ", " factor ", mydata$open5, fixed = TRUE)
mydata$open5 <- gsub(" algorithms ", " algorithm ", mydata$open5, fixed = TRUE)
```

```{r}
mydata$legal <- as.numeric(mydata$`Type of Legal Work`)
mydata$legal <- paste(mydata$legal, sep = " ")
mydata$legal <- as.numeric(gsub("\\bNA\\b", "", mydata$legal))
```

```{r}
mydata11 <- mydata[, c("open5", "legal")]
mydata12 <- na.omit(mydata11)
```

```{r}
df5 = data.frame(mydata12)
```

```{r}
df_open5 <- corpus(mydata12, text_field = 'open5')
tokens_open5 <- tokens(df_open5, remove_punct = TRUE, remove_numbers = TRUE)
tokens_open5 <- tokens_remove(tokens_open5, c("can", "may", "sometimes", "also"))
tokens_open5 <- tokens_remove(tokens_open5, stopwords("english"))
dfm_open5 <- dfm(tokens_open5, remove_punct = FALSE, remove_numbers = FALSE)
out <- quanteda::convert(dfm_open5, to = 'stm')
```

```{r}
K <- c(2, 3, 4, 5, 6, 7, 8)
topic_search_open5 <- stm::searchK(out$documents, out$vocab, K, data = out$meta, prevalence = ~ as.factor(legal))
```

```{r}
plot.searchK(topic_search_open5)
```

```{r}
open5_model_Select <- selectModel(out$documents, out$vocab, K = 2, prevalence = ~ legal, max.em.its = 100, data = out$meta, runs = 100, seed = 6)
```

```{r}
plotModels(open5_model_Select, pch=c(1,2,3,4), legend.position="bottomright")
```

```{r, message=FALSE, warning=FALSE, results='hide'}
open5_model <- open5_model_Select$runout[[3]]
```

```{r}
topics_open5 = stm(documents = out$documents, vocab = out$vocab, data = out$meta, K = 2, prevalence = ~ as.factor(legal), init.type = "Spectral")
```

```{r}
labelTopics(topics_open5)
```

```{r}
td_beta_open5 <- tidy(topics_open5)

Figure_7A <- td_beta_open5 %>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  mutate(topic = paste0("Topic ", topic),
         term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(term, beta)) +
  geom_col(alpha = 0.8, show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free_y") +
  coord_flip() + 
  theme_classic(base_size = 15) + 
  scale_x_reordered() +
  labs(x = NULL, y = expression(beta), title = "Highest Word Probabilities by Topic") + 
  theme(
    plot.title = element_text(face = "bold"),         
    axis.title = element_text(face = "bold"),          
    axis.text = element_text(face = "bold"),         
    strip.text = element_text(face = "bold"),         
    plot.margin = margin(1, 1, 1, 1) 
  )

print(Figure_7A)

ggsave("C:/Users/Figure_7A.png", plot = Figure_7A) 
```

```{r}
prep_topic5 <- estimateEffect(1:2~legal, topics_open5, meta=out$meta, uncertainty="Global") 

summary(prep_topic5,topics=1)
summary(prep_topic5,topics=2)
```

```{r}
par(font.main = 2, font.lab = 2, font.axis = 2, cex = 1.2)  

open5_topic1_plot <- plot(prep_topic5, 
                          covariate = "legal", 
                          topics = c(1), 
                          model = prep_topic5, 
                          method = "pointestimate", 
                          xlab = "STM Estimates", 
                          main = "Respondents' Propensity to Use Topic 1", 
                          xlim = c(0, .75), 
                          labeltype = "custom", 
                          custom.labels = c('Law Enforcement', 'Paralegal Student', 'Paralegal', 
                                            'Law Student', 'Lawyer', 'Judge', 
                                            'Not currently in a legal related job'))
```

```{r}
par(font.main = 2, font.lab = 2, font.axis = 2, cex = 1.2) 

open5_topic2_plot <- plot(prep_topic5, 
                          covariate = "legal", 
                          topics = c(2), 
                          model = prep_topic5, 
                          method = "pointestimate", 
                          xlab = "STM Estimates", 
                          main = "Respondents' Propensity to Use Topic 2", 
                          xlim = c(0, .75), 
                          labeltype = "custom", 
                          custom.labels = c('Law Enforcement', 'Paralegal Student', 'Paralegal', 
                                            'Law Student', 'Lawyer', 'Judge', 
                                            'Not currently in a legal related job'))
```

```{r}
par(mfrow = c(1, 2), cex = 1.2, font.main = 2, font.lab = 2, font.axis = 2) 

plot(prep_topic5, covariate = "legal", topics = c(1), model = prep_topic5, 
     method = "pointestimate", xlab = "STM Estimates", main = "Propensity to Use Topic 1", 
     xlim = c(-.1, .7), labeltype = "custom", 
     custom.labels = c('Law Enforcement', 'Paralegal Student', 'Paralegal', 
                       'Law Student', 'Lawyer', 'Judge', 'Not currently in a legal related job'))
plot(prep_topic5, covariate = "legal", topics = c(2), model = prep_topic5, 
     method = "pointestimate", xlab = "STM Estimates", main = "Propensity to Use Topic 2", 
     xlim = c(-.1, .7), labeltype = "custom", 
     custom.labels = c('Law Enforcement', 'Paralegal Student', 'Paralegal', 
                       'Law Student', 'Lawyer', 'Judge', 'Not currently in a legal related job'))

par(mfrow = c(1, 1), cex = 1.2, font.main = 2, font.lab = 2, font.axis = 2) 
```

```{r}
text5 <- c(mydata$open5)
```

```{r}
open5 <- na.omit(mydata$open5)
```

```{r}
df5_stanza <- table(as.factor(df5nlp$stanza_edited5))
```

```{r}
df5nlp$composite <- (df5nlp$stanza_edited5 + df5nlp$vader_edited5 + df5nlp$textblob_edited5) / 3
breaks <- seq(-1, 1, length.out = 6)
df5nlp$composite.factor <- cut(df5nlp$composite, breaks, labels = FALSE, include.lowest = TRUE)
par(font.main = 2, font.lab = 2, font.axis = 2, cex = 1.2) 
colors2 = c("darkgreen", "darkblue", "red", "yellow", "blue")
plot_7C <- barplot((table_open5_composite_chi) / nrow(df5nlp),
        ylab = "Proportion of Responses",
        names.arg = c("Very Negative", "Negative", "Neutral", "Positive", "Very Positive"),
        col = colors2,
        legend.text = c('Law Enforcement', 'Paralegal', 'Lawyer', 'Judge', 'Other'),
        beside = TRUE,
        cex.names = 0.85,  
        cex.axis = 0.85,   
        args.legend = list(title = "Legal Professions", y.intersp = 0.7, text.font = 1.75, x = "topright", inset = c(0, -0.1))  
)
```
